#####################
#####################

###Replication code for 
###Quotas and Party Priorities:
###Direct and Indirect Effects of Quota Laws 

###Matching procedure

#####################
#####################

rm(list = ls())


##load full data set 
load("data_for_matching.RData")
head(prq_data) 

blah<-prq_data

as.data.frame(table(blah$party)) ##132 parties
as.data.frame(table(blah$countryname)) ##21 countries 
as.data.frame(table(blah$year)) ##41 election-years, 1969 - 2011 

##includes 29 lag1.quota observations 
as.data.frame(table(blah$party[blah$lag1.quota==1])) ##24 parties that get a quota, in 4 countries

##install.packages("cem", dependencies=TRUE, repos='http://cran.rstudio.com/')
library(cem)
table(blah$party[blah$quota==1])

##have to define qparty and then drop all the post-quota observations 

aggdata <-aggregate(blah, by=list(blah$party), 
  FUN=mean, na.rm=TRUE)

aggdata$qparty<-0
aggdata$qparty[aggdata$lag1.quota!=0]<-1
table(aggdata$qparty) ##24 parties that get a quota

##go back and merge with original data 

set<-c("party", "qparty")
agg_temp<-(aggdata[,set])
nrow(agg_temp)

test2<- merge(blah, agg_temp, by=c("party"), all.x=TRUE) 
nrow(test2) 
head(test2)
table(test2$qparty)
blah<-test2

##remove observations after a quota law is implemented 
newdata <- subset(blah, quota==0)
nrow(newdata) 
table(newdata$qparty)


##now restrict data to matching variables and match 
aggdata2 <-aggregate(newdata, by=list(newdata$party), 
  FUN=mean, na.rm=TRUE)
nrow(aggdata2) ##128 parties, bc 4 only existed after a quota 
aggdata2$countryname<-NULL

##subset the data to only variables that I match on / DVs
vars <- c("year", "parfam", "lag1.pervote", "lag1.pfem_new", "lag1.fplabfo", "lag1.partyquota", "qparty", "party", "per503", "per504", "rile")
new<-(aggdata2[,vars])
nrow(new) # 128 

##Mahalanobis matching
##uses exact matching on party family 

library(MatchIt)
set.seed(12345)
nearest.match<-matchit(formula=qparty ~ parfam + year + lag1.pervote + lag1.partyquota + lag1.pfem_new, exact="parfam", data=new, method="nearest", distance="mahalanobis")
nearest.match$match.matrix ##matched 20 parties; for table need to call up new data set (eg fix(new)) to look at parties
## by row number
summary(nearest.match) ##shows summary of balance before and after 
plot(nearest.match) ##presents QQ plots
newdat<-match.data(nearest.match)
nrow(newdat)
table(newdat$party) ##returns the parties in new data

############################
##return to decompressed data

newdat$matched2<-1
set<-c("party", "matched2")
nd2<-newdat[,set]

test4<- merge(blah, nd2, by=c("party"), all.x=TRUE) #no NAs
head(test4)
table(test4$party[test4$matched2==1]) ##includes 40 parties 
nrow(test4)
nrow(blah) 
blah<-test4

dat2<-subset(blah, matched2==1)
nrow(dat2) ##282 obs, 40 parties, 16 countries, 1969 -- 2011

save(dat2, file="matched_data.RData")













